Reference: David M. Blei, Alp Kucukelbir & Jon D. McAuliffe (2017) Variational Inference: Review for Statisticians, Journal of the American Statistical Association, 112:518, 859-877.
\(\displaystyle \begin{array}{{>{\displaystyle}l}} x_{i} |c_{i} ,\ \mu \ \sim \ N\left( c^{T}_{i} \mu ,\ 1\right)\\ \\ c_{i} \ \sim \ Categorical( 1/K,\ ...,\ 1/K)\\ \\ \mu _{k} \ \sim \ N\left( 0,\ \sigma ^{2}\right)\\ \\ q( c_{i} ;\ \varphi _{i}) \ \sim \ Categorical( \varphi _{i1} ,\ ...,\ \varphi _{iK})\\ \\ q( \mu _{k} ;\ m_{k} ,\ s_{k}) \ \sim \ N\left( m_{k} ,\ s^{2}_{k}\right)\\ \\ \end{array}\)
Nate that
\[\begin{eqnarray*} ELBO = E_q[logp(x, c, \mu)] - E_q[ogq(\mu, c)] \\ \\ \\ \end{eqnarray*}\]\(\displaystyle \begin{array}{{>{\displaystyle}l}} p( \mu _{k}) \ =\ \frac{1}{\sqrt{2\pi } \sigma } exp\left( -\frac{\mu ^{2}_{k}}{2\sigma ^{2}}\right) \\ \\ p( c_{i}) \ =\ \frac{1}{K} \\ \\ p( x_{i} |\mu ,\ c_{i}) \ =\ \frac{1}{\sqrt{2\pi }}\prod ^{K}_{k} exp\left( -\frac{( x_{i} -\mu _{k})^{2}}{2}\right)^{c_{ik}}\\ \\ p( x,c,\mu ) \ =\ \left(\prod ^{N}_{i} p( x_{i} ,c_{i} |\mu )\right) p( \mu ) \ =\ \left(\prod ^{N}_{i} p( x_{i} |c_{i} ,\mu ) p( c_{i})\right)\left(\prod ^{K}_{k} p( \mu _{k})\right) \ \\ \\ log( p( x,c,\mu )) \ =\ \sum ^{N}_{i} log( p( x_{i} |c_{i} ,\mu )) \ +\ \sum ^{N}_{i} log( p( c_{i})) \ +\ \sum ^{K}_{k} log( p( \mu _{k}))\\ \\ \\ \\ \ \sum ^{N}_{i} log( p( x_{i} |c_{i} ,\mu )) \ =\sum ^{N}_{i} \ log\left(\frac{1}{\sqrt{2\pi }}\right) \ +\ \sum ^{N}_{i}\sum ^{K}_{k} c_{ik}\left( -\frac{( x_{i} -\mu _{k})^{2}}{2}\right) \ =-Nlog\left(\sqrt{2\pi }\right) +\ \sum ^{N}_{i}\sum ^{K}_{k} c_{ik}\left( -\frac{x^{2}_{i} +\mu ^{2}_{k} -2x_{i} \mu _{k}}{2}\right) \ \\ \\ exp.ll\ =\ E\left( \ \sum ^{N}_{i} log( p( x_{i} |c_{i} ,\mu )) \ \right) \ =\ -Nlog\left(\sqrt{2\pi }\right) +\ \ \sum ^{N}_{i}\sum ^{K}_{k} \varphi _{ik}\left( -\frac{x^{2}_{i} +s^{2}_{k} +m^{2}_{k} -2x_{i} m_{k}}{2}\right)\\ \\ \\ \sum ^{N}_{i} log( p( c_{i})) \ =\ \sum ^{N}_{i} log\left(\frac{1}{K}\right) \ =\ -Nlog( K)\\ \\ \\ exp.pc\ =\ E\left(\sum ^{N}_{i} log( p( c_{i}))\right) \ =\ -Nlog( K)\\ \\ \\ \sum ^{K}_{k} log( p( \mu _{k})) \ =\ \sum ^{K}_{k} log\left(\frac{1}{\sqrt{2\pi } \sigma }\right) -\ \sum ^{K}_{k}\frac{\mu ^{2}_{k}}{2\sigma ^{2}} \ \ =\ -Klog\left( \sigma \sqrt{2\pi }\right) -\ \sum ^{K}_{k}\frac{\mu ^{2}_{k}}{2\sigma ^{2}}\\ \\ \\ exp.pm\ =E\left(\sum ^{K}_{k} log( p( \mu _{k}))\right) \ =\ -Klog\left( \sigma \sqrt{2\pi }\right) \ -\ \sum ^{K}_{k}\frac{s^{2}_{k} +m^{2}_{k}}{2\sigma ^{2}}\\ \end{array}\)
\(\displaystyle \begin{array}{{>{\displaystyle}l}} q( \mu _{k} ;\ m_{k} ,\ s_{k}) \ =\ \ \frac{1}{\sqrt{2\pi } s_{k}} exp\left( -\frac{( \mu _{k} -m_{k})^{2}}{2s^{2}_{k}}\right) ,\ q( c_{i} ;\ \varphi _{i}) \ =\ \prod ^{K}_{k} \varphi ^{c_{ik}}_{ik}\\ \ \ \ \ \ \ \ \ \ \ \ \\ q( \mu ,c) \ =\ \left(\prod ^{K}_{k} q( \mu _{k})\right)\left(\prod ^{N}_{i} q( c_{i})\right) \ \\ \\ log( q( \mu ,c)) \ =\ \sum ^{K}_{k} log( q( \mu _{k})) \ +\ \sum ^{N}_{i} log( q( c_{i}))\\ \\ \\ \\ \sum ^{K}_{k} log( q( \mu _{k})) \ =\ \sum ^{K}_{k} log\left(\frac{1}{\sqrt{2\pi } s_{k}}\right) \ -\ \sum ^{K}_{k}\frac{( \mu _{k} -m_{k})^{2}}{2s^{2}_{k}} \ =\ -\sum ^{K}_{k} log\left( s_{k}\sqrt{2\pi }\right) \ -\ \sum ^{K}_{k}\frac{\mu ^{2}_{k} +m^{2}_{k} -2\mu _{k} m_{k}}{2s^{2}_{k}} \ \\ \\ exp.vm\ =E\left(\sum ^{K}_{k} log( q( \mu _{k}))\right) \ =\ -\sum ^{K}_{k} log\left( s_{k}\sqrt{2\pi }\right) -\ \sum ^{K}_{k}\frac{m^{2}_{k} +s^{2}_{k} +m^{2}_{k} -2m^{2}_{k}}{2s^{2}_{k}} \ =\ -\sum ^{K}_{k} log\left( s_{k}\sqrt{2\pi }\right) \ -\ \frac{K}{2}\\ \\ \\ \sum ^{N}_{i} log( q( c_{i})) \ =\ \sum ^{N}_{i}\sum ^{K}_{k} c_{ik} log( \varphi _{ik})\\ \\ exp.vc\ =E\left(\sum ^{N}_{i} log( q( c_{i}))\right) =\ \sum ^{N}_{i}\sum ^{K}_{k} \varphi _{ik} log( \varphi _{ik}) \end{array}\)
\(\displaystyle ELBO\ =\ exp.ll\ +\ exp.pm\ +\ exp.pc\ -\ exp.vm\ -\ exp.vc\)
calc_elbo <- function(x, N, K, sigma, m, s, phi){
#expected likelihood function
exp.ll <- -N*log(sqrt(2*pi))
for(i in 1:N){
for(k in 1:K){
exp.ll <- exp.ll - phi[i,k]*(x[i]^2 + s[k]^2 + m[k]^2 - 2 * m[k] * x[i])/2
}
}
#expected prior of clustering assignments
exp.pc <- -N*log(K)
#expected prior of component means
exp.pm <- -K*log(sigma*sqrt(2*pi))
for(k in 1:K){
exp.pm <- exp.pm - (s[k]^2 + m[k]^2)/(2*sigma^2)
}
#expected variational factor of component means
exp.vm <- sum(-log(s*sqrt(2*pi))) - K/2
#expected variational factor of clustering assignments
exp.vc <- 0
for(i in 1:N){
for(k in 1:K){
exp.vc <- exp.vc + phi[i,k] * log(phi[i,k])
}
}
exp.ll + exp.pc + exp.pm - exp.vc - exp.vm
}
\(\displaystyle \begin{array}{{>{\displaystyle}l}} p( \mu _{k}) \ =\ \frac{1}{\sqrt{2\pi } \sigma } exp\left( -\frac{\mu ^{2}_{k}}{2\sigma ^{2}}\right) ,\ p( c_{i}) \ =\ \frac{1}{K} ,\ p( x_{i} |c_{i} ,\ \mu ) \ =\frac{1}{\sqrt{2\pi }}\prod ^{K}_{k} exp\left( -\frac{( x_{i} \ -\ \mu _{k})^{2}}{2}\right)^{{c_{i}}_{k}}\\ \\ \\ p( x ,\ c ,\ \mu ) \ =\ p( x,\ c\ |\ \mu ) p( \mu ) \ =\ \left(\prod ^{N}_{i} p( x_{i} |c_{i} ,\ \mu ) \ p( c_{i})\right)\left(\prod ^{K}_{k} \ p( \mu _{k}) \ \right) \ =\ \\ \\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =\ C\left(\prod ^{N}_{i}\prod ^{K}_{k} exp\left( -\frac{( x_{i} \ -\ \mu _{k})^{2}}{2}\right)^{{c_{i}}_{k}}\right)\left(\prod ^{K}_{k} exp\left( -\frac{\mu ^{2}_{k}}{2\sigma ^{2}}\right)\right) ,\ \ where\ C\ is\ a\ constant.\\ \end{array}\)
\(\displaystyle \begin{array}{{>{\displaystyle}l}} \\ log( p( x,\ c ,\ \mu _{k} ,\mu _{-k})) \ =\ C\ +\ \sum ^{N}_{i}\sum ^{K}_{k}\left( -\ c_{ik}\frac{( x_{i} \ -\ \mu _{k})^{2}}{2}\right) \ +\ \sum ^{K}_{k} -\frac{\mu ^{2}_{k}}{2\sigma ^{2}} \ \\ \\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =\ C\ -\ \frac{\mu ^{2}_{k}}{2\sigma ^{2}} \ -\ \sum ^{N}_{i}\left( c_{ik}\frac{( x_{i} \ -\ \mu _{k})^{2}}{2}\right)\\ \\ E_{c,\ \mu _{-k}}( log( p( x,\ c ,\ \mu _{k} ,\mu _{-k}))) \ =\ C\ -\ \frac{\mu ^{2}_{k}}{2\sigma ^{2}} \ -\ \sum ^{N}_{i}\left( E_{c_{i}}( c_{ik})\frac{( x_{i} \ -\ \mu _{k})^{2}}{2}\right)\\ \\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =\ C\ -\ \ \frac{\mu ^{2}_{k}}{2\sigma ^{2}} \ -\sum ^{N}_{i}\left( E_{c_{i}}( c_{ik})\frac{x^{2}_{i} \ +\ \mu ^{2}_{k} \ -\ 2x_{i} \mu _{k}}{2}\right)\\ \\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =\ C\ -\ \ \frac{\mu ^{2}_{k}}{2\sigma ^{2}} \ -\sum ^{N}_{i}\left( E_{c_{i}}( c_{ik})\frac{\mu ^{2}_{k} \ -\ 2x_{i} \mu _{k}}{2}\right)\\ \\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =\ C\ -\ \ \frac{\mu ^{2}_{k}}{2\sigma ^{2}} \ -\sum ^{N}_{i}\frac{E_{c_{i}}( c_{ik}) \mu ^{2}_{k}}{2} \ +\ \sum ^{N}_{i} E_{c_{i}}( c_{ik}) x_{i} \mu _{k}\\ \\ \\ \\ E_{c_{i}}( c_{ik}) \ =\ \varphi _{ik}\\ \\ \\ q^{*}( \mu _{k}) \ \varpropto \ exp( E_{c,\ \mu _{-k}}( log( p( x,\ c ,\ \mu _{k} ,\mu _{-k})))) \ \varpropto \ exp\left( -\ \ \frac{\mu ^{2}_{k}}{2\sigma ^{2}} \ -\sum ^{N}_{i}\frac{\varphi _{ik} \mu ^{2}_{k}}{2} \ +\ \sum ^{N}_{i} \varphi _{ik} x_{i} \mu _{k}\right)\\ \\ \ \ \ \ \ \ \ \ \ \ \ =\ exp\left( -\ \ \left(\frac{1}{2\sigma ^{2}} \ +\sum ^{N}_{i}\frac{\varphi _{ik}}{2}\right) \mu ^{2}_{k} \ +\ \sum ^{N}_{i} \varphi _{ik} x_{i} \mu _{k}\right)\\ \\ \ \ \ \ \ \ \ \ \ \ \ \ =\ exp\left(\frac{}{}\frac{\mu ^{2}_{k} \ -\ \frac{\sum ^{N}_{i} \varphi _{ik} x_{i} \mu _{k}}{\frac{1}{2\sigma ^{2}} \ +\sum ^{N}_{i}\frac{\varphi _{ik}}{2}}}{\frac{1}{\frac{1}{2\sigma ^{2}} \ +\sum ^{N}_{i}\frac{\varphi _{ik}}{2}}}\right) \ =\ exp\left(\frac{}{}\frac{\mu ^{2}_{k} \ -\ 2\ \cdot \frac{\sum ^{N}_{i} \varphi _{ik} x_{i}}{\frac{1}{\sigma ^{2}} \ +\sum ^{N}_{i} \varphi _{ik}} \mu _{k}}{2\cdot \ \frac{1}{\frac{1}{\sigma ^{2}} \ +\sum ^{N}_{i} \varphi _{ik}}}\right)\\ \\ \\ For\ an\ approprate\ posterior\ distribution,\ \int q^{*}( \mu _{k}) \ d\mu _{k} \ =\ 1\\ \\ \therefore \ \mu _{k} \ \sim \ N\left(\frac{\sum ^{N}_{i} \varphi _{ik} x_{i}}{\frac{1}{\sigma ^{2}} \ +\sum ^{N}_{i} \varphi _{ik}} ,\ \frac{1}{\frac{1}{\sigma ^{2}} \ +\sum ^{N}_{i} \varphi _{ik}}\right)\\ \\ that\ is:\ m_{k} \ =\ \frac{\sum ^{N}_{i} \varphi _{ik} x_{i}}{\frac{1}{\sigma ^{2}} \ +\sum ^{N}_{i} \varphi _{ik}} ,\ s^{2}_{k} \ =\ \frac{1}{\frac{1}{\sigma ^{2}} \ +\sum ^{N}_{i} \varphi _{ik}} \end{array}\)
\(\displaystyle \begin{array}{{>{\displaystyle}l}} \\ log( p( x,\ \mu ,\ c_{i} ,\ c_{-i})) \ =\ C\ +\ \sum ^{N}_{i}\sum ^{K}_{k}\left( -\ c_{ik}\frac{( x_{i} \ -\ \mu _{k})^{2}}{2}\right) \ +\ \sum ^{K}_{k} -\frac{\mu ^{2}_{k}}{2\sigma ^{2}} \ \\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =\ C\ -\sum ^{K}_{k} \ c_{ik}\frac{( x_{i} \ -\ \mu _{k})^{2}}{2}\\ \\ E_{\mu ,\ c_{-i}}( log( p( x,\ \mu ,\ c_{ik} ,\ c_{-ik}))) \ =\ \ C\ \ -\sum ^{K}_{k}\left(\frac{c_{ik} x^{2}_{i}}{2} \ +\ \frac{c_{ik} E_{\mu _{k}}\left( \mu ^{2}_{k}\right)}{2} -\ c_{ik} x_{i} E_{\mu _{k}}( \mu _{k})\right)\\ \\ \\ E_{\mu _{k}}( \mu _{k}) \ =\ m_{k}\\ \\ \\ E_{\mu _{k}}\left( \mu ^{2}_{k}\right) \ =\ Var( \mu _{k}) \ +\ E^{2}_{\mu _{k}}( \mu _{k}) \ =\ s^{2}_{k} \ +\ m^{2}_{k}\\ \\ \\ q^{*}( c_{i}) \ \varpropto \ exp( E_{\mu ,\ c_{-i}}( log( p( x,\ \mu ,\ c_{i} ,\ c_{-i})))) \ \varpropto \ exp\left(\sum ^{K}_{k}\left( -\frac{c_{ik} x^{2}_{i}}{2} \ -\ \frac{c_{ik} \ \left( s^{2}_{k} \ +\ m^{2}_{k}\right)}{2} +\ c_{ik} x_{i} m_{k}\right)\right)\\ \\ \ \ \ \ \ \ \ \ \ \ \ =\ exp\left( -\frac{x^{2}_{i}}{2} \ +\sum ^{K}_{k}\left( \ -\ \frac{c_{ik} \ \left( s^{2}_{k} \ +\ m^{2}_{k}\right)}{2} +\ c_{ik} x_{i} m_{k}\right)\right) \ \varpropto exp\left(\sum ^{K}_{k}\left( \ -\ \frac{c_{ik} \ \left( s^{2}_{k} \ +\ m^{2}_{k}\right)}{2} +\ c_{ik} x_{i} m_{k}\right)\right)\\ \\ \varphi _{ik} =\ C\ \cdot \ exp\left( \ -\ \frac{s^{2}_{k} \ +\ m^{2}_{k}}{2} +\ x_{i} m_{k}\right)\\ \\ For\ an\ appropraite\ posterior\ distribution,\ \sum ^{K}_{k} \varphi _{ik} \ =\ 1\\ \\ \varphi _{ik} \ =\ \frac{exp\left( \ -\ \frac{s^{2}_{k} \ +\ m^{2}_{k}}{2} +\ x_{i} m_{k}\right)}{\sum ^{K}_{k} exp\left( \ -\ \frac{s^{2}_{k} \ +\ m^{2}_{k}}{2} +\ x_{i} m_{k}\right)} \end{array}\)
update.mean <- function(x, N, sigma, phi_k){
m <- sum(phi_k * x)/(1/sigma^2 + sum(phi_k))
s <- sqrt(1/(1/sigma^2 + sum(phi_k)))
return(list(m = m, s = s))
}
update.cluster <- function(x_i, m, s){
phi_i <- exp(-(s^2+m^2)/2+x_i*m)
phi_i <- phi_i/sum(phi_i)
phi_i
}
CAVI <- function(x,N,K,sigma,m,s,phi, tol = 1e-10){
elbo <- calc_elbo(x,N,K,sigma,m,s,phi)
iter <- 1
#while elbo not converge
while(TRUE){
for(k in 1:K){
new.mean.factor <- update.mean(x,N,sigma,phi[,k])
m[k] <- new.mean.factor$m
s[k] <- new.mean.factor$s
}
for(i in 1:N){
phi[i,] <- update.cluster(x[i],m,s)
}
iter <- iter + 1
elbo[iter] <- calc_elbo(x,N,K,sigma,m,s,phi)
#check convergence
if(abs(elbo[iter] - elbo[iter - 1]) < tol){
break
}
}
return(list(m = m, s = s, phi = phi, elbo = elbo))
}
set.seed(123)
#set N and K
N <- 300
r.K <- 3
#real mean
r.mu <- c(-3, 0 ,3)
#real variance
r.var <- 1
#real cluster assignment
r.c <- rep(c(1,2,3),each = N/r.K)
r.c <- model.matrix(~0 + as.factor(r.c))
#simulate data
x <- rnorm(N, r.c %*% r.mu, r.var)
ggplot() + geom_point(aes(x = 1:N, y = x, col = as.factor(r.c %*% r.mu)),size = 2) + scale_color_discrete('real mean') + xlab('') + ylab('')
#prior sigma
sigma <- 3
#model K
K <- 3
#set initial phi
phi <- t(replicate(N, runif(K)))
phi <- phi/rowSums(phi)
#set initial variational mean and variance
m <- rnorm(K, sd = 2)
s <- runif(K, 0, 3)
now perform CAVI
ribbon.plot <- phi
for(i in 1:N){
ribbon.plot[i,] <- cumsum(ribbon.plot[i,])
}
ggplot(data = data.frame(ribbon.plot)) + geom_ribbon(aes(1:N,ymin = 0, ymax = X1,fill = '1')) +
geom_ribbon(aes(1:N,ymin = X1, ymax = X2, fill = '2')) +
geom_ribbon(aes(1:N,ymin = X2, ymax = X3, fill = '3')) + xlab('samples')
result <- CAVI(x,N,K,sigma,m,s,phi)
#plot the result
ribbon.plot <- result$phi
for(i in 1:N){
ribbon.plot[i,] <- cumsum(ribbon.plot[i,])
}
ggplot(data = data.frame(ribbon.plot)) + geom_ribbon(aes(1:N,ymin = 0, ymax = X1,fill = '1')) +
geom_ribbon(aes(1:N,ymin = X1, ymax = X2, fill = '2')) +
geom_ribbon(aes(1:N,ymin = X2, ymax = X3, fill = '3')) + xlab('samples')
plot(result$elbo, type = 'l', lwd = 4, col = alpha('steelblue2', 0.7), xlab = 'iteration', ylab = 'elbo')
result$m
## [1] 3.055890 -2.900992 -0.275898
result$s
## [1] 0.0978388 0.1014882 0.1006167
Run one trail first
#set initial phi
phi <- t(replicate(N, runif(K)))
phi <- phi/rowSums(phi)
#set initial variational mean and variance
m <- rnorm(K, sd = 2)
s <- runif(K, 0, 3)
result <- CAVI(x,N,K,sigma,m,s,phi, tol = 1e-8)
plot(result$elbo, type = 'l', lwd = 4, col = alpha(1, 0.5), xlab = 'iteration', ylab = 'elbo', ylim = c(-800,-680))
max.elbo <- result$elbo[length(result$elbo)]
final.m <- result$m; final.s <- result$s; final.mphi <- result$phi
for(i in 1:100){
#set initial phi
phi <- t(replicate(N, runif(K)))
phi <- phi/rowSums(phi)
#set initial variational mean and variance
m <- rnorm(K, sd = 2)
s <- runif(K, 0, 3)
result <- CAVI(x,N,K,sigma,m,s,phi, tol = 1e-8)
lines(result$elbo, lwd = 4, col = alpha(i+1, 0.5), xlab = 'iteration', ylab = 'elbo')
#update m,s,phi if the converged elbo is greater than previous' trails maximum elbo.
final.elbo <- result$elbo[length(result$elbo)]
if(final.elbo > max.elbo){
max.elbo <- final.elbo
final.m <- result$m; final.s <- result$s; final.phi <- result$phi
}
}
final.m
## [1] -2.9009861 3.0558941 -0.2758861
final.s
## [1] 0.10148796 0.09783893 0.10061680
#prior sigma
sigma <- 3
#model K
K <- 2
#set initial phi
phi <- t(replicate(N, runif(K)))
phi <- phi/rowSums(phi)
#set initial variational mean and variance
m <- rnorm(K, sd = 2)
s <- runif(K, 0, 3)
now perform CAVI
ribbon.plot <- phi
for(i in 1:N){
ribbon.plot[i,] <- cumsum(ribbon.plot[i,])
}
ggplot(data = data.frame(ribbon.plot)) + geom_ribbon(aes(1:N,ymin = 0, ymax = X1,fill = '1')) +
geom_ribbon(aes(1:N,ymin = X1, ymax = X2, fill = '2')) + xlab('samples')
result <- CAVI(x,N,K,sigma,m,s,phi)
#plot the result
ribbon.plot <- result$phi
for(i in 1:N){
ribbon.plot[i,] <- cumsum(ribbon.plot[i,])
}
ggplot(data = data.frame(ribbon.plot)) + geom_ribbon(aes(1:N,ymin = 0, ymax = X1,fill = '1')) +
geom_ribbon(aes(1:N,ymin = X1, ymax = X2, fill = '2')) + xlab('samples')
plot(result$elbo, type = 'l', lwd = 4, col = alpha('steelblue2', 0.7), xlab = 'iteration', ylab = 'elbo')
result$m
## [1] -1.960490 2.588159
result$s
## [1] 0.07702491 0.08714823
#prior sigma
sigma <- 3
#model K
K <- 4
#set initial phi
phi <- t(replicate(N, runif(K)))
phi <- phi/rowSums(phi)
#set initial variational mean and variance
m <- rnorm(K, sd = 2)
s <- runif(K, 0, 3)
now perform CAVI
ribbon.plot <- phi
for(i in 1:N){
ribbon.plot[i,] <- cumsum(ribbon.plot[i,])
}
ggplot(data = data.frame(ribbon.plot)) + geom_ribbon(aes(1:N,ymin = 0, ymax = X1,fill = '1')) +
geom_ribbon(aes(1:N,ymin = X1, ymax = X2, fill = '2')) + geom_ribbon(aes(1:N,ymin = X2, ymax = X3, fill = '3')) +
geom_ribbon(aes(1:N,ymin = X3, ymax = X4, fill = '4')) + xlab('samples')
result <- CAVI(x,N,K,sigma,m,s,phi)
#plot the result
ribbon.plot <- result$phi
for(i in 1:N){
ribbon.plot[i,] <- cumsum(ribbon.plot[i,])
}
ggplot(data = data.frame(ribbon.plot)) + geom_ribbon(aes(1:N,ymin = 0, ymax = X1,fill = '1')) +
geom_ribbon(aes(1:N,ymin = X1, ymax = X2, fill = '2')) + geom_ribbon(aes(1:N,ymin = X2, ymax = X3, fill = '3')) +
geom_ribbon(aes(1:N,ymin = X3, ymax = X4, fill = '4')) + xlab('samples')
plot(result$elbo, type = 'l', lwd = 4, col = alpha('steelblue2', 0.7), xlab = 'iteration', ylab = 'elbo')
result$m
## [1] 3.2022502 0.4890398 -1.2569644 -3.1162235
result$s
## [1] 0.1039963 0.1230576 0.1223090 0.1153941